* generate the final dataset for main analysis
*** OUTLINE ***
* prelims
* prepare data
* generate new variables at the spell level
* aggregate at the non-employment spell level
* close	
***************
********************************************************************************
* prelims																	   
********************************************************************************
{ 
clear all
cap log close
set more off
}
********************************************************************************
* prepare data														   
********************************************************************************
{
* load affiliation data
use "$data/mcvl2004-17_02affi_rest_age_un.dta", clear
* relevant variables
keep id affstart affend reltype contregime contract partialcoeff
* add spell information
merge m:1 id affstart affend using "$data/int/spellgaps_nover.dta", keepusing(start end)
keep if _merge == 3
drop _merge
* add UI left/gen information
merge m:1 id affstart affend using "$data/int/ui_entit.dta", keepusing(ui_left_start)
drop _merge
* identify spells from workers 'fijos discontinuos'
merge m:1 id affstart affend using "$data/int/fijosd_spells.dta", keepusing(fijod_spell)
drop if _merge == 2
drop _merge
replace fijod_spell=0 if (reltype != 753 & reltype != 754) & fijod_spell != .
* add personal information
g year = year(start)
merge m:1 id year using "$data/mcvl2004-17_01pers_rest_age_un_long.dta", keepusing(gender educ residence)
drop if _merge==2
drop _merge
* add birth information
merge m:1 id using "$data/int/birthdate.dta", keepusing(bday)
keep if _merge==3
drop _merge
* add pensions (DI) information
merge m:1 id using "$data/int/pensions.dta", keepusing(retdate disdate)
drop if _merge==2
replace retdate = . if retdate>mdy(1,1,2018)
drop _merge
* add cohabitants info
merge m:1 id year using "$data/int/n_cohabitants.dta", keepusing(cohab)
drop if _merge==2
drop _merge
* add industry information
merge m:1 id affstart affend using "$data/int/industry_spell.dta"
drop if _merge==2
drop _merge

duplicates drop id affstart affend, force
}
********************************************************************************
* generate new variables at the spell level
********************************************************************************
{
	
* urban
g urban = (int(residence/1000)!=residence/1000)
replace urban = . if residence == . | residence == 0
la var urban "Urban"

* prov
tostring residence, g(t_prov)
replace t_prov = "0" + t_prov if length(t_prov)==4
g prov = substr(t_prov,1,2)
destring prov, replace
drop t_prov residence
la var prov "Province"

* highschool
g highschool = (educ>=40 & educ!= 99)
replace highschool = . if educ == .
la var highschool "High School"
drop educ

* male
g male = (gender == 1)
la var male "Male"
la def male 1 "Male" 0 "Female"
la val male male
drop gender

* DI
la var disdate "DI pension date"

* family
g family = (cohab > 1)
replace family = . if cohab == .
la var family "Family"

* censored spells
local censdate 20171231 
gen censored = (affend >= `censdate')
la var censored "Censored spell"

* state identifier
gen state = .
replace state = 1 if (reltype < 751 | reltype > 756) & reltype != .
replace state = 2 if (reltype == 751 | reltype == 752)
replace state = 3 if (reltype == 753 | reltype == 754) & fijod_spell == 0
replace state = 4 if (reltype == 753 | reltype == 754) & fijod_spell == 1
replace state = 5 if (reltype == 755 | reltype == 756)
label def state 1 "Empl" 2 "UI" 3 "52/55 subs" 4 "U FijoD" 5 "Other UA" 6 "Retirement" 7 "OLF"
label val state state
label var state "state identifier"
drop fijod_spell

* identify previous state
sort id start
by id: gen prevstate = state[_n-1] 
label val prevstate state
la var prevstate "Previous state"

* identify next state
sort id start end
by id: gen exitstate = state[_n+1] 
g t = end - retdate 
replace t = 0 if t >=-15 & t <0 & t !=.
replace exitstate = 6 if exitstate==. & retdate !=. & t >=0
drop t
drop retdate
sort id start 
bys id: g gap = start[_n+1] - end
replace exitstate = 7 if gap > 365 &  gap !=. 
replace exitstate = 7 if exitstate ==. & censored == 0 
replace exitstate = . if censored == 1
la var exitstate "Next state"
label val exitstate state
la var gap "Gap to next spell"

* identify workers agrarian, sea and mining workers
gen excl =  (contregime >= 3 & contregime <= 6)
la var excl "spell in agrarian, sea or mining sector"

* identify unemployment spells following a spell in agrarian, sea or minining
sort id start
bys id: g excl_u = excl[_n-1]
replace excl_u = 0 if state==1
la var excl_u "Unemployment spell following job in agrarian, sea or mining"

* identify short UA spells
g length = end-start+1
la var length "Length of the spell"

* generate new ages
g age = (start - bday)/365
la var age "Age at the start of the spell"

* identify nonemployment spells
sort id start
g t=(state!=1)
bys id: replace t=0 if t==1 & prevstate!=1 & exitstate[_n-1]!=1 & exitstate[_n-1]!=6 & exitstate[_n-1]!=7
gen x = 0 
bys id: replace x = x[_n-1] + t if _n>1
rename x uspell_count
replace uspell_count = . if state == 1
la var uspell_count "Unemployment spell identifier"
drop t

* date of entry in non-employment
sort id start
bys id uspell_count: egen u_entry_date = min(start) if uspell_count!=.
format u_entry_date %td
la var u_entry_date "Unemployment entry date"

* days OLF before next job
sort id start end
bys id: g daysOLF = start[_n+1]-end  if (exitstate==7 & state[_n+1]==1)
la var daysOLF "days OLF after spell"

* store dataset at the spell level
order id start end prevstate state exitstate ui_left_start prov
sort id start end
save "$data/int/analysis_dataset_spell.dta", replace
}
********************************************************************************
* aggregate at the non-employment spell level
********************************************************************************
{
* load spell level data
use "$data/int/analysis_dataset_spell.dta", clear

* set panel
sort id start
bys id: g spellnum = _n
xtset id spellnum

	* generate agg vars at the non-employment spell level
	****************************************************************************
	{
* first temp vars, then aggregate
* start date of next employment spell
g t_startnextemp = f1.start if f1.state==1

* industry
replace industry = 0 if industry==.
sort id spellnum
g t_last_industry = l1.industry

* part-time coefficient
g t_last_partialcoeff = l1.partialcoeff
g t_next_partialcoeff = f1.partialcoeff

* previous contract before non-employment spell
g t_last_contract = l1.contract

* first contract after non-employment spell
g t_next_contract = f1.contract

* next industry
g t_next_industry = f1.industry
drop industry

* duration in last job
g dur = end - start
g t_durlastspell = l1.dur
g t_durlastjob = l1.dur if prevstate==1

* days in next job
g t_durnextjob = f1.dur
replace t_durnextjob = mdy(12,31,2017) - f1.start if f1.censored==1
replace t_durnextjob = . if exitstate != 1
drop dur

* count total days in UI, empl, UA, subsidy
foreach x in 2 3 5 {
g t = length if state == `x'
replace t = 0 if t == . 
bys id uspell_count: egen tot_`x' = total(t)
drop t 
}
drop length
rename tot_2 totui 
rename tot_3 totsub
rename tot_5 totua
replace totsub = 1500 if totsub >1500 & totsub !=.
replace totua = 900 if totua >900 & totua !=. 
la var totui "Total number of days in UI"
la var totsub "Total number of days in the 52/55yo subsidy"
la var totua "Total number of days in Other UA"

** aggregate and generate vars
drop if state==1
drop spellnum

* next employment date
bys id uspell_count: egen startnextemp = min(t_startnextemp)
format startnextemp %td
la var startnextemp "start date of next employment (if any)"
drop t_startnextemp

* end of UI entitlement
g y = start + ui_left_start + 30
bys id uspell_count: egen ui_end_plus30 = min(y)
drop y

* generate start/end dates of spell
bys id uspell_count: egen u_exit_date = max(end)
la var u_exit_date "Non-employment exit date"
format u_exit_date %td

* cohabitants at the moment of UI exhaustion
cap drop year
g year=year(ui_end_plus30)
cap drop cohab
merge m:1 id year using "$data/int/n_cohabitants.dta", keepusing(cohab)
drop if _m==2
drop _m year
label var cohab "cohabitants at UI exhaustion"

* exit to subsidy
g x = (state==3)
bys id uspell_count: egen sub = max(x)
drop x
label var sub "52/55yo Subsidy"
g x = start if state==3
bys id uspell_count: egen sub_date = min(x)
drop x
la var sub_date "52/55yo Subsidy - entry date"
	
* exit to other ua
g x = (state==4 | state ==5)
bys id uspell_count: egen ua = max(x)
drop x
label var ua "Other UA"
g x = start if (state==4 | state==5)
bys id uspell_count: egen ua_date = min(x)
drop x
la var ua_date "Other UA - entry date"

* exit to inactive or to retirement
g x = (exitstate==6|exitstate==7)
bys id uspell_count: egen exitlabor = max(x)
drop x
label var exitlabor "Exit Labor Force"
g x = u_exit_date if (exitstate==6 | exitstate==7)
bys id uspell_count: egen exitlabor_date = min(x)
drop x
la var exitlabor_date "Exit Labor force - date"

* exit to employment
g x = (exitstate==1)
bys id uspell_count: egen exitempl = max(x)
drop x
label var exitempl "Exit to Empl"
g x = u_exit_date if exitstate==1
bys id uspell_count: egen exitempl_date = min(x)
drop x
la var exitempl_date "Exit to Empl - date"

* format
format exitempl_date sub_date ua_date exitlabor_date %td

* days OLF
g x = daysOLF
drop daysOLF
bys id uspell_count: egen daysOLF = max(x)
drop x
label var daysOLF "Days OLF"

* censoring
cap drop censored
g censored = (u_exit_date >= mdy(12,31,2017))
la var censored "Spell is censored"

* last contract
bys id uspell_count: egen xmax = max(start) if uspell_count!=.
bys id uspell_count: egen xmin = min(start) if uspell_count!=.
g t2_last_contract = (t_last_contract) if xmin==start
bys id uspell_count: egen last_contract = mean(t2_last_contract)
drop t*_last_contract
label var last_contract "Last contract"
g last_tempcontract = (last_contract==2|last_contract==4|last_contract==5)
label var last_tempcontract "Last temporary job"

* next contract
g t2_next_contract = (t_next_contract) if xmax==start
bys id uspell_count: egen next_contract = mean(t2_next_contract)
drop t*_next_contract
label var next_contract "Next contract"
g next_tempcont = (next_contract==2|next_contract==4|next_contract==5)
label var next_tempcont "Found temporary job"
drop last_contract next_contract

* last partial coefficient
g t2_last_partialcoeff= (t_last_partialcoeff) if xmin==start
bys id uspell_count: egen last_partialcoeff = mean(t2_last_partialcoeff)
drop t*_last_partialcoeff
label var last_partialcoeff "Last Partial Coefficient"

* next partial coefficient
g t2_next_partialcoeff = (t_next_partialcoeff) if xmax==start
bys id uspell_count: egen next_partialcoeff = mean(t2_next_partialcoeff)
drop t*_next_partialcoeff
label var next_partialcoeff "Next Partial Coefficient"

* previous industry
g t2_last_industry = (t_last_industry) if xmin==start
bys id uspell_count: egen last_industry = mean(t2_last_industry)
drop t*_last_industry
label var last_industry "Last industry"

* next industry
g t2_next_industry = (t_next_industry) if xmax==start
bys id uspell_count: egen next_industry = mean(t2_next_industry)
drop t*_next_industry
label var next_industry "Next industry"

* industry change
g industry_change = (next_industry!=last_industry)
replace industry_change=. if (next_industry==.|last_industry==.)
la var industry_change "Individual changes industry"

* tenure in next job
bys id uspell_count: egen durnextjob = mean(t_durnextjob)
replace durnextjob = 1 if durnextjob == 0
replace durnextjob = 1500 if durnextjob >1500 & durnextjob !=. 
la var durnextjob "Duration of Next Job"
drop t_durnext*

* tenure in last job
bys id uspell_count: egen durlastjob = mean(t_durlastjob)
replace durlastjob = 0 if durlastjob==.
la var durlastjob "Duration of Last Job"
g lndurlastjob = ln(durlastjob + 1)
la var lndurlastjob "Log Duration of Last Job"
drop t_durlast*

* first state
g t_first_ustate = state if xmin==start
bys id uspell_count: egen first_ustate = mean(t_first_ustate)
la val first_ustate state
la var first_ustate "First state in non-employment spell"

* last state
g t_last_ustate = state if xmax==start
bys id uspell_count: egen last_ustate = mean(t_last_ustate)
la val last_ustate state
la var last_ustate "Last state in non-employment spell"
drop t_*_ustate* xmin xmax

* special/excluded regime
rename excl_u t_excl_u
bys id uspell_count: egen excl_u = max(t_excl_u)
drop t_excl_u
drop excl contregime

* ui
g t_UIentit = round(ui_left_start/30)
bys id uspell_count: egen UIentit = max(t_UIentit)
la var UIentit "Months of UI entitlement"
drop t_UIentit ui_left_start

	}
	* aggregate and gen add vars at the non-employment spell level
	****************************************************************************
	{
* aggregate
sort id start
egen tagspell = tag(id uspell_count)
keep if tagspell==1
drop tagspell uspell_count

* family
cap drop family
g family = (cohab>1)
replace family = . if cohab==.
label var family "Family"
drop cohab

* urate at start of spell
g year=year(u_entry_date)
g quarter=quarter(u_entry_date)
merge m:1 year quarter prov using "$data/int/urates.dta"
drop if _merge==2
drop _merge
rename urate_prov urate_prov_qstart
rename var_urate_prov var_urate_prov_qstart
drop year quarter

* urate at UI end 
cap drop year quarter
g year=year(ui_end_plus30)
g quarter=quarter(ui_end_plus30)
merge m:1 year quarter prov using "$data/int/urates.dta"
drop if _merge==2
g quarter_UIend = yq(year,quarter)
format quarter_UIend %tq
la var quarter_UIend "UI exhaustion quarter"
drop _merge year quarter
rename urate_prov urate_prov_qUIend
rename var_urate_prov var_urate_prov_qUIend

** wages
* merge with contribution data
merge m:1 id using "$data/mcvl2004-17_03cont_rest_age_un_wide.dta"
drop if _merge==2
drop _merge

forval j = 0(1)2 {

	* month job loss - j
	g ym_b_l`j' = ym(year(u_entry_date),month(u_entry_date))-`j'
	g month_b_l`j' = month(dofm(ym_b_l`j'))
	g year_b_l`j' = year(dofm(ym_b_l`j'))
	* month job find + j
	g ym_a_f`j' =ym(year(u_exit_date),month(u_exit_date))+`j'
	g month_a_f`j' = month(dofm(ym_a_f`j'))
	g year_a_f`j' = year(dofm(ym_a_f`j'))

}

* days worked month left after job find
g daysleft_month_a_f0 = (mdy(month_a_f1,1,year_a_f1) - u_exit_date)
g dayswork_month_a_f0 = daysleft_month_a_f0 if durnextjob >= daysleft_month_a_f0
replace dayswork_month_a_f0 = durnextjob if durnextjob < daysleft_month_a_f0

* days month left after job find + 1
g dayswork_month_a_f1 = . if durnextjob < daysleft_month_a_f0
replace dayswork_month_a_f1 = 30 if durnextjob >= (daysleft_month_a_f0 + 30) & durnextjob!=.
replace dayswork_month_a_f1 = (durnextjob-daysleft_month_a_f0) if  durnextjob < (daysleft_month_a_f0 + 30) & daysleft_month_a_f0!=. & durnextjob > daysleft_month_a_f0

* days worked in the month the worker lost her job
g dayspass_month_b_l0 = (u_entry_date - mdy(month_b_l0,1,year_b_l0) + 1)
g dayswork_month_b_l0 = dayspass_month_b_l0 if durlastjob >= dayspass_month_b_l0
replace dayswork_month_b_l0 = durlastjob if durlastjob < dayspass_month_b_l0

* days worked in the month prior to losing the job
g dayswork_month_b_l1 = . if durlastjob < dayspass_month_b_l0
replace dayswork_month_b_l1 = 30 if durlastjob >= (dayswork_month_b_l0 + 30) & durlastjob!=.
replace dayswork_month_b_l1 = (durlastjob-dayswork_month_b_l0) if  durlastjob < (dayswork_month_b_l0 + 30) & dayswork_month_b_l0!=. & durlastjob > dayswork_month_b_l0

* taking into account part time workers
replace last_partialcoeff = 1000 if last_partialcoeff == . |last_partialcoeff == 0
replace next_partialcoeff = 1000 if next_partialcoeff == . |next_partialcoeff == 0
replace last_partialcoeff = last_partialcoeff/1000
replace next_partialcoeff = next_partialcoeff/1000

g lastwage = .
g nextwage = .

* assign wages
forval m=1(1)12 {
	forval y=2005(1)2017 {

	* assign previous/next wage if person worked the whole month
	replace lastwage = (cot`m'`y')*(30/dayswork_month_b_l1)*(1/last_partialcoeff) if ym_b_l1 == ym(`y',`m') & dayswork_month_b_l1>=28 & dayswork_month_b_l1!=.
	replace nextwage = (cot`m'`y')*(30/dayswork_month_a_f1)*(1/next_partialcoeff) if ym_a_f1 == ym(`y',`m') & dayswork_month_a_f1>=28 & dayswork_month_a_f1!=. & durnextjob!=.	

	* if not working entire month, get monthly equivalent
	replace lastwage = (cot`m'`y' /dayswork_month_b_l0)*30*(1/last_partialcoeff) if ym_b_l0 == ym(`y',`m') & dayswork_month_b_l1==. 
	replace nextwage = (cot`m'`y'/dayswork_month_a_f0)*30*(1/next_partialcoeff) if ym_a_f0 == ym(`y',`m') & dayswork_month_a_f1==. & durnextjob!=. 

	* if still missing, it's worker in the 2nd month with less than 28 days worked
	replace lastwage = (cot`m'`y' /dayswork_month_b_l0)*30*(1/last_partialcoeff) if ym_b_l0 == ym(`y',`m') & lastwage==. & dayswork_month_b_l1<15 
	replace nextwage = (cot`m'`y'/dayswork_month_a_f0)*30*(1/next_partialcoeff) if ym_a_f0 == ym(`y',`m') & nextwage==. & dayswork_month_a_f1<15 & durnextjob!=. 
	
	* if more than 15 days - get the following/previous days
	replace lastwage = (cot`m'`y' /dayswork_month_b_l1)*30*(1/last_partialcoeff) if ym_b_l1 == ym(`y',`m') & (lastwage==.|lastwage==0) & dayswork_month_b_l1>=15 
	replace nextwage = (cot`m'`y'/dayswork_month_a_f1)*30*(1/next_partialcoeff) if ym_a_f1 == ym(`y',`m') & (nextwage==.|nextwage==0) & dayswork_month_a_f1>=15 & durnextjob!=.
		
	* if still missing or 0, assign wage of entry/exit month
	replace lastwage = (cot`m'`y' /dayswork_month_b_l0)*30*(1/last_partialcoeff) if ym_b_l0 == ym(`y',`m') & (lastwage==.|lastwage==0) 
	replace nextwage = (cot`m'`y'/dayswork_month_a_f0)*30*(1/next_partialcoeff) if ym_a_f0 == ym(`y',`m') & (nextwage==.|nextwage==0) & durnextjob!=. 
	
	}
}

* adjust to euros as unit
replace lastwage=lastwage/100
replace nextwage=nextwage/100

* wage diff
g wagediff = nextwage-lastwage
replace wagediff=wagediff/100

* account for max and min contribution bases
foreach var in lastwage nextwage {

	if ("`var'" == "nextwage") {
		local resvar year_a_f0
		local ptime next_partialcoeff
	}
	else {
		local resvar year_b_l0
		local ptime last_partialcoeff
	}

* maximum contribution
replace `var' =  2813.4 if `var'>2813.4  & `var'!=. & `resvar'==2005
replace `var' = 2897.7 if `var'>2897.7  & `var'!=. & `resvar'==2006
replace `var' = 2996.1 if `var'>2996.1  & `var'!=. & `resvar'==2007
replace `var' = 3074.1 if `var'>3074.1  & `var'!=. & `resvar'==2008
replace `var' = 3166.2 if `var'>3166.2  & `var'!=. & `resvar'==2009
replace `var' = 3198 if `var'>3198 & `var'!=. & `resvar'==2010
replace `var' = 3230.1 if `var'>3230.1  & `var'!=. & `resvar'==2011
replace `var' = 3262.5 if `var'>3262.5  & `var'!=. & `resvar'==2012
replace `var' = 3425.7 if `var'>3425.7  & `var'!=. & `resvar'==2013
replace `var' = 3597 if `var'>3597  & `var'!=. & `resvar'==2014
replace `var' = 3606 if `var'>3606 & `var'!=. & `resvar'==2015
replace `var' = 3642 if `var'>3642  & `var'!=. & `resvar'==2016
replace `var' = 3751.2 if `var'>3751.2  & `var'!=. & `resvar'==2017

* minimum contribution 
replace `var' = 598.5*(`ptime') if `var'<598.5*(`ptime')  & (`var'!=.&`var'!=0) & `resvar'==2005
replace `var' = 631.2*(`ptime') if `var'<631.2*(`ptime')  & (`var'!=.&`var'!=0)  & `resvar'==2006
replace `var' = 665.7*(`ptime') if `var'<665.7*(`ptime')  & (`var'!=.&`var'!=0)  & `resvar'==2007
replace `var' = 699.8*(`ptime') if `var'<699.8*(`ptime')  & (`var'!=.&`var'!=0)  & `resvar'==2008
replace `var' = 728.1*(`ptime') if `var'<728.1*(`ptime')  & (`var'!=.&`var'!=0)  & `resvar'==2009
replace `var' = 738.9*(`ptime') if `var'<738.9*(`ptime') & (`var'!=.&`var'!=0)  & `resvar'==2010
replace `var' = 748.2*(`ptime') if `var'<748.2*(`ptime')  & (`var'!=.&`var'!=0)  & `resvar'==2011
replace `var' = 748.2*(`ptime') if `var'<748.2*(`ptime')  & (`var'!=.&`var'!=0)  & `resvar'==2012
replace `var' = 753*(`ptime') if `var'<753*(`ptime')  & (`var'!=.&`var'!=0) & `resvar'==2013
replace `var' = 753*(`ptime') if `var'<753*(`ptime')  & (`var'!=.&`var'!=0)  & `resvar'==2014
replace `var' = 756.6*(`ptime') if `var'<756.6*(`ptime') & (`var'!=.&`var'!=0)  & `resvar'==2015
replace `var' = 764.4*(`ptime') if `var'<764.4*(`ptime')  & (`var'!=.&`var'!=0)  & `resvar'==2016
replace `var' = 825.6*(`ptime') if `var'<825.6*(`ptime')  & (`var'!=.&`var'!=0)  & `resvar'==2017
	
}

* drop before 2006
drop if year(u_entry_date)<2006

* drop vars defined at (non-agg) spell level
drop partialcoeff cot* dayspass_month_* dayswork_month* daysleft_month*
#d;
	drop start end state exitstate affstart affend reltype 
	contract gap prevstate ym_b_* 
	 ym_a_* year_b* year_a* month_b* month_a*
;
#d cr

** inflation adjustment
* last wage
g year = year(u_entry_date)
merge m:1 year using "$data/int/inflation.dta", keepusing(*2012)
drop if _m==2
drop _m
g lastwageReal = lastwage * (infbase2012/100)
la var lastwageReal "Last Wage (Real: 2012 Base)"
g lnlastwageReal = ln(lastwageReal)
la var lnlastwageReal "Log Last Wage (Real: 2012 Base)"

* next wage
drop year infbase2012
g year = year(u_exit_date)
merge m:1 year using "$data/int/inflation.dta", keepusing(*2012)
drop if _m==2
drop _m
g nextwageReal = nextwage * (infbase2012/100)
la var nextwageReal "Next Wage (Real: 2012 Base)"
g lnnextwageReal = ln(nextwageReal)
la var lnnextwageReal "Log Next Wage (Real: 2012 Base)"

drop year infbase2012

* generate real wage difference
g wagediffReal = nextwageReal - lastwageReal
la var wagediffReal "Wage difference (Real: 2012 Base)"
g lnwagediffReal = lnnextwageReal - lnlastwageReal
la var lnwagediffReal "Log Wage difference (Real: 2012 Base)"

* real wages as default
drop lastwage nextwage wagediff
rename lastwageReal lastwage
rename lnlastwageReal lnlastwage
rename nextwageReal nextwage
rename lnnextwageReal lnnextwage
rename wagediffReal wagediff
rename lnwagediffReal lnwagediff

replace lastwage = lastwage/30
replace nextwage = nextwage/30
replace lnwagediff = ln(nextwage)-ln(lastwage)
replace wagediff = nextwage - lastwage
replace wagediff = . if durnextjob == .
replace lnwagediff = . if durnextjob == .

** additional variables														   
* corrections
replace u_exit_date = mdy(6,20,2018) if u_exit_date ==mdy(12,31,2099)
egen x = rowmin(sub_date ua_date)
g y = (x < ui_end_plus30 & ui_end_plus30 != . & (sub_date !=.|ua_date!=.))
g ui_end = ui_end_plus30
replace ui_end = x if y == 1 
drop ui_end_plus30
rename ui_end ui_end_plus30
format ui_end_plus30 %td
la var ui_end_plus30 "UI exhaustion date"
replace UIentit = round((ui_end_plus30-u_entry_date)/30) if y == 1  & UIentit > 0 
replace UIentit = 24 if UIentit == 25
drop x y

* age ui exhaustion
cap drop age
g age_uiendplus30 = (ui_end_plus30 - bday)/365
la var age_uiendplus30 "Age at UI exhaustion" 

* duration to states
g duration = u_exit_date - u_entry_date + 1
g duration_to_e = (exitempl_date - u_entry_date + 1)
g duration_to_sub = (sub_date - u_entry_date + 1)
g duration_to_ua = (ua_date - u_entry_date + 1)
g duration_to_o = (exitlabor_date - u_entry_date + 1)
la var duration "Non-emp duration"
la var duration_to_e "Duration to Employment"
la var duration_to_sub "Duration to 52/55yo subsidy"
la var duration_to_ua "Duration to Other UA"
la var duration_to_o "Duration to OLF"

* adjust for censoring
local c = 50
g censoring_date = u_entry_date + (`c' * 30)
replace censoring_date = mdy(12,31,2017) if censoring_date>mdy(12,31,2017) & censoring_date!=.

foreach durvar in duration duration_to_e duration_to_o duration_to_sub duration_to_ua {
	if "`durvar'" == "duration" {
		replace `durvar' = (censoring_date - u_entry_date) if censoring_date<u_exit_date
	}
	else if ("`durvar'" == "duration_to_e" | "`durvar'" == "duration_to_o") {
		replace `durvar' = . if censoring_date<u_exit_date
	}	
}

* censored
replace censored = 1 if censoring_date<u_exit_date & u_exit_date!=.
* ua 
replace ua = 0 if censoring_date<ua_date & ua_date!=.
* sub
replace sub = 0 if censoring_date<sub_date & sub_date!=.
* exit empl
replace exitempl = 0 if censoring_date<exitempl_date & exitempl_date!=.
* exit labor force if person still unemployed
replace exitlabor = 0 if censoring_date<exitlabor_date & exitlabor_date!=.
drop censoring_date

local OLFwindow = 365
replace exitempl=1 if daysOLF!=. & daysOLF<=`OLFwindow' & exitlabor==1 & censored!=1
replace exitlabor=0 if daysOLF!=. & daysOLF<=`OLFwindow' & exitempl==1 & censored!=1
drop if exitempl==0 & exitlabor==0 & censored==0

* duration
replace duration = duration + daysOLF if daysOLF!=. & daysOLF<=`OLFwindow' & censored!=1
replace duration_to_e = duration if exitempl == 1
replace duration_to_o = duration if exitlabor == 1
drop daysOLF

* replace to missing if censored
foreach outcome in lnwagediff industry_change next_tempcont next_industry durnextjob {
	replace `outcome' =. if (censored==1|exitempl==0|exitlabor==1)
}

* separation in 6 months
g sep6mo = (durnextjob < 180)
la var sep6mo "Sepration within 6mo"

* same sample re-employment outcomes
foreach var in lnwagediff durnextjob sep6mo next_tempcont industry_change {
	replace `var' = . if (lnwagediff==.|durnextjob==.|next_tempcont==.|industry_change==.)
}

* samples
g sampleperiod = (ui_end_plus30>=mdy(7,15,2011) & ui_end_plus30<mdy(7,15,2013))
la var sampleperiod "sample period"
g sampleage = (age_uiendplus30 >= 49 & age_uiendplus30 < 58)
la var sampleage "sample age range"
		
* temp file
compress
save "$data/temp/analysis_dataset.dta", replace

	}
	* compute ui/ua payments, contributions, and taxes in 50 months			   
	****************************************************************************
	{
* number of months
local c = 50

** taxes
* reshape tax data
use "$data/mcvl2004-17_06fisc_rest_age_un.dta", clear
keep id ret*
bys id: gen idno = _n
reshape long retentions_ ret_ retkey_, i(id idno) j(year)
drop idno ret_noDI_* ret_DI_* retentions_noDI_* retentions_DI_*
duplicates drop
drop if ret_ == . & retentions_ == . 
drop retkey_
gen UI = (retkey == 3 | retkey == 4)
collapse (sum) ret_ retentions_, by(id year UI)
reshape wide ret_ retentions_ , i(id UI) j(year) 
reshape wide ret* , i(id) j(UI) 
forval x = 2005/2017{
foreach var in ret_ retentions_{
rename `var'`x'0 `var'`x'_empl
 rename `var'`x'1 `var'`x'_UI
  egen tot_`var'`x' = rowtotal(`var'`x'_empl `var'`x'_UI)
 }
 }
save "$data/temp/mcvl2004-17_06fisc_reshaped.dta", replace 

* calculate taxes
use "$data/temp/analysis_dataset.dta", clear

keep if sampleage==1 & sampleperiod==1

sort id u_entry_date 
bys id: gen n = _N
bys id: gen ui_spellcount = _n 
keep id u_entry_date ui_spellcount 

egen t_maxcount = max(ui_spellcount)
local maxcount = t_maxcount[1]
drop t_maxcount

gen end_date = u_entry_date +(`c'*30)
reshape wide *date, i(id) j(ui_spell)

merge 1:1 id using "$data/temp/mcvl2004-17_06fisc_reshaped.dta"
drop if _m == 2
drop _m 
drop *UI* *empl* 
duplicates drop
reshape long tot_ret_ tot_retentions_, i(id ) j(year)
drop if year < 2008 
replace tot_ret_ = 0 if tot_ret_ == .
replace tot_retentions_ = 0 if tot_retentions_ == . 
forval x = 1/`maxcount' {
gen days_start`x'= doy(u_entry_date`x')/365
gen days_end`x' = doy(end_date`x')/365
gen y_start`x' = year(u_entry_date`x')
gen y_end`x' = year(end_date`x')
replace days_start`x'=1-days_start`x'
gen tot_ret_`x' = tot_ret_ if year >= y_start`x' & year <= y_end`x' 
replace tot_ret_`x' = tot_ret_`x'*days_start`x' if year == y_start`x'
replace tot_ret_`x' = tot_ret_`x'*days_end`x' if year == y_end`x'
gen tot_retentions_`x' = tot_retentions_ if year >= y_start`x' & year <= y_end`x' 
replace tot_retentions_`x' = tot_retentions_`x'*days_start`x' if year == y_start`x'
replace tot_retentions_`x' = tot_retentions_`x'*days_end`x' if year == y_end`x'
}
collapse (sum) tot_retentions_* tot_ret_* (first) u_entry_date* end_date*, by(id)
drop tot_retentions_ tot_ret_ 
reshape long tot_retentions_ tot_ret_ u_entry_date end_date , i(id) j(u_spell)
drop if u_entry_date == . 
replace tot_retentions_ = tot_retentions_/100
replace tot_ret_ = tot_ret_/100

forval y = 2008(1)2018 {

g totdayscont`y' = 0
replace totdayscont`y' = 365 if u_entry_date <= mdy(1,1,`y') & end_date >= mdy(12,31,`y')
replace totdayscont`y' = mdy(12,31,`y') - u_entry_date if end_date >= mdy(12,31,`y') & u_entry_date >= mdy(1,1,`y') & u_entry_date < mdy(12,31,`y')
replace totdayscont`y' = end_date - mdy(1,1,`y') if end_date < mdy(12,31,`y') & u_entry_date < mdy(1,1,`y') &  end_date >= mdy(1,1,`y')
replace totdayscont`y' = end_date - u_entry_date if end_date<=mdy(12,31,`y') & u_entry_date >= mdy(1,1,`y')

}

g totdayscont = end_date - u_entry_date

* estimate euros contributed in a given year
forval y = 2008(1)2018 {

g totret`y' = tot_retentions_ * (totdayscont`y'/totdayscont)

}

scalar rate = 0.02
g totretdisc = 0

forval y = 2008(1)2018 {
	
g totretdisc`y' = totret`y'

	if `y' >= 2012 {
	
	local diff = `y' - 2012
	replace totretdisc`y' = totretdisc`y' * (1 + rate)^(`diff')
	
	}
	else if `y' < 2012 {
	
	local diff = 2012 - `y'
	replace totretdisc`y' = totretdisc`y' / (1 + rate)^(`diff')
		
	}
	
replace totretdisc = totretdisc + totretdisc`y'	
	
}

* store
compress
save "$data/temp/tottaxdisc50months.dta", replace

** generate payments/taxes variables
use "$data/temp/analysis_dataset.dta", clear

keep if sampleage==1 & sampleperiod==1

g totua_amount = totua*14.3 
g totsub_amount = totsub*14.3
lab var totua_amount "Other Ua" 
lab var totsub_amount "52/55 Subsidy" 

g end_date_sub = sub_date + totsub
g end_date_ua = ua_date + totua

* need to account days in sub/ua
forval y = 2008(1)2018 {
	foreach benefit in sub ua {

g totdays`benefit'cont`y' = 0
replace totdays`benefit'cont`y' = 365 if `benefit'_date <= mdy(1,1,`y') & end_date_`benefit' >= mdy(12,31,`y')
replace totdays`benefit'cont`y' = mdy(12,31,`y') - `benefit'_date if end_date_`benefit' >= mdy(12,31,`y') & `benefit'_date >= mdy(1,1,`y') & `benefit'_date < mdy(12,31,`y')
replace totdays`benefit'cont`y' = end_date_`benefit' - mdy(1,1,`y') if end_date_`benefit' < mdy(12,31,`y') & `benefit'_date < mdy(1,1,`y') &  end_date_`benefit' >= mdy(1,1,`y')
replace totdays`benefit'cont`y' = end_date_`benefit' - `benefit'_date if end_date_`benefit'<=mdy(12,31,`y') & `benefit'_date >= mdy(1,1,`y')

	}
}

* estimate euros contributed in a given year
forval y = 2008(1)2018 {
	foreach benefit in sub ua {

	g tot`benefit'_amount`y' = tot`benefit'_amount * (totdays`benefit'cont`y'/tot`benefit')
	* take into account divisions by 0
	replace tot`benefit'_amount`y' = 0 if tot`benefit'_amount`y' == .

	}
}

* state pension contribution per year - info in MCVL doc
g statecont2008 = 699.9
g statecont2009 = 728.1
g statecont2010 = 738.9
g statecont2011 = 748.2
g statecont2012 = 748.2
g statecont2013 = 753
g statecont2014 = 753
g statecont2015 = 756.6
g statecont2016 = 764.4
g statecont2017 = 825.6
g statecont2018 = 858.6

* pension contributions - exclusive to the subsidy
* 125% of min contribution
forval y = 2008(1)2018 {
	g totstatecont_amount`y' = floor(totdayssubcont`y'/30) * (statecont`y' * 1.25)
}

* bring all quantities to 2012 euros using 2% rate
scalar rate = 0.02

g totsubdisc = 0
g totuadisc = 0
g totstatecontdisc = 0

forval y = 2008(1)2018 {
	foreach benefit in sub ua statecont {

	
g tot`benefit'disc`y' = tot`benefit'_amount`y'

	if `y' >= 2012 {
	
	local diff = `y' - 2012
	replace tot`benefit'disc`y' = tot`benefit'disc`y' * (1 + rate)^(`diff')
	
	}
	else if `y' < 2012 {
	
	local diff = 2012 - `y'
	replace tot`benefit'disc`y' = tot`benefit'disc`y' / (1 + rate)^(`diff')
		
	}
	
replace tot`benefit'disc = tot`benefit'disc + tot`benefit'disc`y'	

	}
}

drop totsub_amount
drop totua_amount
rename totsubdisc totsub_amount
rename totuadisc totua_amount
rename totstatecontdisc totstatecont_amount

* add taxes
merge 1:1 id u_entry_date using "$data/temp/tottaxdisc50months.dta", keepusing(totretdisc)
drop if _m == 2 
drop _m 

rename totretdisc tottaxes

g totstatecontsavings = - totstatecont_amount
g totaleff = tottaxes - (totsub_amount + totua_amount + totstatecont_amount)

keep id u_entry_date totsub_amount totua_amount totstatecontsavings tottaxes totaleff

* label
la var totaleff "Total savings 50mo"
la var tottaxes "Taxes raised 50mo"
la var totstatecontsavings "Pens contr savings 50mo"
la var totsub_amount "Spending 52/yo Sub 50mo"
la var totua_amount "Spending Other UA 50mo"

* store
compress
save "$data/temp/temp_analysis_dataset_fiscal.dta", replace

	}
	* add variables and store														   
	****************************************************************************
	{
* data
use "$data/temp/analysis_dataset.dta", clear
merge 1:1 id u_entry_date using "$data/temp/temp_analysis_dataset_fiscal.dta"
drop _merge

* after
g after = .
replace after = 1 if (ui_end_plus30>=mdy(7,15,2012) & ui_end_plus30<mdy(7,15,2013))
replace after = 0 if (ui_end_plus30>=mdy(7,15,2011) & ui_end_plus30<mdy(7,15,2012))

* treatment
g treat = .
replace treat = 1 if (age_uiendplus30 >= 52 & age_uiendplus30 < 54.5)
replace treat = 0 if (age_uiendplus30 >= 55 & age_uiendplus30 < 58)

* running variable for rdd
scalar ref2012 = mdy(7,15,2012)
gen reldays = ui_end_plus30 - ref2012
la var reldays "UI exhaustion - Jul 12 2012"

* restrictions
drop if excl_u==1  
drop if censored==1 & last_ustate==2 & treat!=. & after!=.
drop if first_ustate==4|last_ustate==4
drop excl_u first_ustate last_ustate

* prov-quarter
egen ProvQuarter = group(prov quarter_UIend)
la var ProvQuarter "Prov x Quarter UI Exhaustion"

* di
g disability = (disdate >= u_entry_date & disdate !=.) 
la var disability "Disability after Unemp"

* labels
cap la drop treat
label def treat 1 "Age 52-55" 0 "Control"
la val treat treat
la var treat "Age 52-55"
cap la drop after
label def after 1 "After" 0 "Before"
label val after after
la var after "After"
la var id "Individual identifier"

* store
sort id u_entry_date
order id age* u_entry_date u_exit_date age* ui_end_plus30 duration* censored
compress
save "$data/analysis_dataset.dta", replace

* erase temp files
cap erase "$data/temp/analysis_dataset.dta"
cap erase "$data/temp/mcvl2004-17_06fisc_reshaped.dta"
cap erase "$data/temp/tottaxdisc50months.dta"
cap erase "$data/temp/temp_analysis_dataset_fiscal.dta"

	}
}
********************************************************************************
* close																	   
********************************************************************************
{
cap log close
clear
}
